import sys
IN_COLAB = "google.colab" in sys.modules
if IN_COLAB:
!pip install --quiet scvi-colab
from scvi_colab import install
install()
!pip install --quiet git+https://github.com/BayraktarLab/cell2location#egg=cell2location[tutorials]
import scanpy as sc
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import cell2location
from matplotlib import rcParams
rcParams['pdf.fonttype'] = 42 # enables correct plotting of text for PDFs
### setting ###
st_sample = "E12.5_slice13"
sc_sample = "sc_E12"
out_path = "/media/biogenger/D/Projects/CMY/Analysis/mouse_lung/Overall/Fig2/Deconvolution/Cell2Location/result"
# read the seurat object
tmp = "/media/server/data/LJJ/Cell2Location/srat.merge.annotated.h5ad"
adata_st = sc.read_h5ad(tmp)
adata_st.X = adata_st.raw.X.copy()
del adata_st.raw
# find mitochondria-encoded (MT) genes
adata_st.var["useless_gene"] = [gene.startswith("mt-") for gene in adata_st.var.index] or [gene.startswith("Hba-") for gene in adata_st.var.index] or [gene.startswith("Hbb-") for gene in adata_st.var.index]
# remove MT genes for spatial mapping (keeping their counts in the object)
adata_st.obsm["useless_gene"] = adata_st[:, adata_st.var["useless_gene"].values].X.toarray()
adata_st = adata_st[:, ~adata_st.var["useless_gene"].values]
# read the seurat object
tmp = "/media/server/data/LJJ/Cell2Location/"+sc_sample+"/h5ad/srat.merge.annotated.h5ad"
adata_sc = sc.read_h5ad(tmp)
adata_sc.X = adata_sc.raw.X.copy()
del adata_sc.raw
from cell2location.utils.filtering import filter_genes
selected = filter_genes(adata_sc, cell_count_cutoff=5, cell_percentage_cutoff2=0.05, nonz_mean_cutoff=1.1)
# filter the object
adata_sc = adata_sc[:, selected].copy()
# prepare anndata for the regression model
cell2location.models.RegressionModel.setup_anndata(adata=adata_sc, labels_key="split_reanno_ctp")
# create the regression model
from cell2location.models import RegressionModel
mod = RegressionModel(adata_sc)
# view anndata_setup as a sanity check
mod.view_anndata_setup()
mod.train(max_epochs=250, batch_size=2500, train_size=1, use_gpu=True)
mod.plot_history(20)
# In this section, we export the estimated cell abundance (summary of the posterior distribution).
adata_sc = mod.export_posterior(adata_sc, sample_kwargs={'num_samples': 1000, 'batch_size': 2500, 'use_gpu': True})
# adata_sc = mod.export_posterior(adata_sc, use_quantiles=True, add_to_varm=["means", "stds", "q05","q50", "q95", "q0001"], sample_kwargs={'batch_size': 2500, 'use_gpu': True})
import os
os.makedirs(os.path.join(out_path, st_sample, "reference_signatures"), )
# Save anndata object with results
adata_sc.write(os.path.join(out_path, st_sample, "reference_signatures", "sc.h5ad"))
# adata_sc = sc.read_h5ad(os.path.join(out_path, st_sample, "reference_signatures", "sc.h5ad"))
# Save model
mod.save(os.path.join(out_path, st_sample, "reference_signatures"), overwrite=True)
# mod = cell2location.models.RegressionModel.load(os.path.join(out_path, st_sample, "reference_signatures"), adata_sc)
mod.plot_QC()
# export estimated expression in each cluster
if 'means_per_cluster_mu_fg' in adata_sc.varm.keys():
inf_aver = adata_sc.varm['means_per_cluster_mu_fg'][[f'means_per_cluster_mu_fg_{i}' for i in adata_sc.uns['mod']['factor_names']]].copy()
else:
inf_aver = adata_sc.var[[f'means_per_cluster_mu_fg_{i}' for i in adata_sc.uns['mod']['factor_names']]].copy()
inf_aver.columns = adata_sc.uns['mod']['factor_names']
inf_aver.iloc[0:5, 0:5]
##################################
# Cell2location: spatial mapping #
##################################
adata_vis = adata_st[adata_st.obs["orig.ident"].isin([st_sample])].copy()
# find shared genes and subset both anndata and reference signatures
intersect = np.intersect1d(adata_vis.var_names, inf_aver.index)
adata_vis = adata_vis[:, intersect].copy()
inf_aver = inf_aver.loc[intersect, :].copy()
# prepare anndata for cell2location model
cell2location.models.Cell2location.setup_anndata(adata=adata_vis, labels_key="split_ctp")
# create and train the model
mod = cell2location.models.Cell2location(
adata_vis, cell_state_df=inf_aver,
# the expected average cell abundance: tissue-dependent
# hyper-prior which can be estimated from paired histology:
N_cells_per_location=4,
# hyperparameter controlling normalisation of
# within-experiment variation in RNA detection:
detection_alpha=20
)
mod.view_anndata_setup()
mod.train(max_epochs=30000, batch_size=None, train_size=1, use_gpu=True)
# plot ELBO loss history during training, removing first 100 epochs from the plot
mod.plot_history(1000)
plt.legend(labels=["full data training"])
# In this section, we export the estimated cell abundance (summary of the posterior distribution).
adata_vis = mod.export_posterior(adata_vis, sample_kwargs={'num_samples': 1000, 'batch_size': mod.adata.n_obs, 'use_gpu': True})
os.makedirs(os.path.join(out_path, st_sample, "cell2location_map"))
# Save anndata object with results
adata_vis.write(os.path.join(out_path, st_sample, "cell2location_map", "sp.h5ad"))
# adata_vis = sc.read_h5ad(os.path.join(out_path, st_sample, "cell2location_map", "sp.h5ad"))
# Save model
mod.save(os.path.join(out_path, st_sample, "cell2location_map"), overwrite=True)
# mod = cell2location.models.Cell2location.load(os.path.join(out_path, st_sample, "cell2location_map"), adata_vis)
mod.plot_QC()
# add 5% quantile, representing confident cell abundance, 'at least this amount is present',
# to adata.obs with nice names for plotting
adata_vis.obs[adata_vis.uns["mod"]["factor_names"]] = adata_vis.obsm["q05_cell_abundance_w_sf"]
# select one slide
from cell2location.utils import select_slide
slide = select_slide(adata_vis, s=st_sample, batch_key="orig.ident")
# plot in spatial coordinates
with mpl.rc_context({"axes.facecolor": "black", "figure.figsize": [5, 5.5]}):
# adata_sc.obs["split_reanno_ctp"].unique().tolist()
sc.pl.spatial(slide, cmap="magma", color=adata_sc.obs["split_reanno_ctp"].unique().tolist(),
ncols=4, size=20, img_key="hires",
# limit color scale at 99.2% quantile of cell abundance
vmin=0, vmax="p99.2")
# Now we use cell2location plotter that allows showing multiple cell types in one panel
from cell2location.plt import plot_spatial
# select up to 6 clusters
clust_labels = ["Wnt2+ FB", "gCap", "Myofibroblast"]
clust_col = ["" + str(i) for i in clust_labels] # in case column names differ from labels
slide = select_slide(adata_vis, st_sample, batch_key="orig.ident")
with mpl.rc_context({"figure.figsize": (10, 10)}):
fig = plot_spatial(
adata=slide,
# labels to show on a plot
color=clust_col, labels=clust_labels,
show_img=True,
# 'fast' (white background) or 'dark_background'
style="fast",
# limit color scale at 99.2% quantile of cell abundance
max_color_quantile=0.992,
# size of locations (adjust depending on figure size)
circle_diameter=6,
colorbar_position="right"
)
adata_vis.obsm["q05_cell_abundance_w_sf"].to_csv(os.path.join(out_path, st_sample, "q05_cell_abundance_w_sf.csv"))